Top 10 Events#

Hide code cell source
import warnings
warnings.filterwarnings("ignore")

import os
import os.path as op
import sys
import folium

import numpy as np
import pandas as pd

sys.path.append("../../../../indicators_setup")
from ind_setup.plotting_int import plot_timeseries_interactive
from ind_setup.colors import get_df_col
from ind_setup.tables import plot_df_table, get_data_metrics, table_temperature_summary

sys.path.append("../../../functions")
from data_downloaders import GHCN

Define location and variables of interest#

country = 'Palau'
vars_interest = ['TMIN', 'TMAX']

Get Data#

https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/doc/GHCND_documentation.pdf

update_data = False
path_data = "../../../data"
path_figs = "../../../matrix_cc/figures" 

Using Koror Station#

Analysis of how much the maximum and minimum temperatures over time are changing.
The analysis of the difference between these 2 variables will allow us to know how the daily variability is being modified

Hide code cell source
if update_data:
    df_country = GHCN.get_country_code(country)
    print(f'The GHCN code for {country} is {df_country["Code"].values[0]}')

    df_stations = GHCN.download_stations_info()
    df_country_stations = df_stations[df_stations['ID'].str.startswith(df_country.Code.values[0])]
    print(f'There are {df_country_stations.shape[0]} stations in {country}')
Hide code cell source
if update_data:
    GHCND_dir = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/'
    id = 'PSW00040309' # Koror Station
    dict_min = GHCN.extract_dict_data_var(GHCND_dir, 'TMIN', df_country_stations.loc[df_country_stations['ID'] == id])[0][0]
    dict_max = GHCN.extract_dict_data_var(GHCND_dir, 'TMAX', df_country_stations.loc[df_country_stations['ID'] == id])[0][0]
    st_data = pd.concat([dict_min['data'], (dict_max['data'])], axis=1).dropna()
    st_data['diff'] = st_data['TMAX'] - st_data['TMIN']
    st_data['TMEAN'] = (st_data['TMAX'] + st_data['TMIN'])/2
    st_data.to_pickle(op.join(path_data, 'GHCN_surface_temperature.pkl'))
else:
    st_data = pd.read_pickle(op.join(path_data, 'GHCN_surface_temperature.pkl'))
df = table_temperature_summary(st_data)
fig = plot_df_table(df.T, figsize = (500, 340),)
../../../_images/7ba2bb71b306790603b38d62937f90338c95b25134072785bf35549f2c31b7ec.png

Monthly#

st_data = st_data.resample('M').mean()
nevents = 10
top_10 = st_data.sort_values(by='TMAX', ascending=False).head(nevents)
dict_plot = [{'data' : st_data, 'var' : 'TMAX', 'ax' : 1, 'label' : 'TMAX'}]
scatter_dict = [{'data' : top_10, 'var' : 'TMAX', 'ax' : 1, 'label' : 'Top 10 Events'}]

fig = plot_timeseries_interactive(dict_plot, trendline=True, scatter_dict = scatter_dict, figsize = (25, 12))
fig = plot_df_table(np.round(top_10, 2)[['TMAX']])
../../../_images/107a1fe7b28147ce978c1744b57c3b4a74a507b11cafb57ca588942cde27ea5f.png

Annual#

st_data = st_data.resample('Y').mean()
top_10 = st_data.sort_values(by='TMAX', ascending=False).head(10)
dict_plot = [{'data' : st_data, 'var' : 'TMAX', 'ax' : 1, 'label' : 'TMAX'}]
scatter_dict = [{'data' : top_10, 'var' : 'TMAX', 'ax' : 1, 'label' : 'Top 10 Events'}]
fig = plot_timeseries_interactive(dict_plot, trendline=True, scatter_dict = scatter_dict, figsize = (25, 12));
fig = plot_df_table(np.round(top_10, 2)[['TMAX']])
../../../_images/be773ae9b896417a945a65f46ba93cd3ff1357fabe3f43df95e4e974d3f88914.png
st_data
TMIN TMAX diff TMEAN
DATE
1951-12-31 24.150806 30.923638 6.772832 27.537222
1952-12-31 23.838715 30.904048 7.065333 27.371381
1953-12-31 24.084018 31.039644 6.955627 27.561831
1954-12-31 24.978473 31.087915 6.109442 28.033194
1955-12-31 23.757035 30.740156 6.983121 27.248596
... ... ... ... ...
2021-12-31 24.750006 28.620488 3.870482 26.685247
2022-12-31 24.567689 29.918439 5.350750 27.243064
2023-12-31 25.550660 30.044822 4.494161 27.797741
2024-12-31 25.849527 30.151326 4.301799 28.000426
2025-12-31 25.641176 30.170588 4.529412 27.905882

75 rows × 4 columns